Introduction

SpatialPCA is a spatially aware dimension reduction method that aims to infer a low dimensional representation of the gene expression data in spatial transcriptomics. SpatialPCA builds upon the probabilistic version of PCA, incorporates localization information as additional input, and uses a kernel matrix to explicitly model the spatial correlation structure across tissue locations.

Example: ST breast tumor data

Load in processed data: gene expression and location

library(SpatialPCA)
library(ggplot2)
library(bluster)
library(ggpubr)
library(slingshot)

load("ST_tumor_example.RData")
ls()
## [1] "expr" "info"

Scale expression of each gene, and prepare data matrices needed in SpatialPCA. Here we set number of low dimensional components to be 20.

expr=scale_expr(expr)
dat = data_prepare_func(expr, info)

Then we select bandwidth for Gaussian kernel.

bandwidth = bandwidth_select(expr, info,"SJ")
K=kernel_build(kernelpara="gaussian", dat$ED2,bandwidth) 

Run SpatialPCA functions to extract spatial PCs:

Est_para = SpatialPCA_estimate_parameter(dat_input=dat,PCnum=20)
Est_W = SpatialPCA_estimate_W(Est_para$par, dat,PCnum=20)
Est_Z = SpatialPCA_estimate_Z(Est_para$par,dat,Est_W,PCnum=20)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
dat$Est_para = Est_para
dat$Est_W = Est_W
dat$Z_spatial = Est_Z$Z_hat
walktrap_cluster_SpatialPCA = walktrap_clustering(knearest = seq(from=30,to=32,by=1), latent_dat=dat$Z_spatial)
## [1] 1
## [1] 2
## [1] 3
cbp_spatialpca <- c(  "plum1", "dodgerblue","mediumaquamarine",  "palegreen4","chocolate1","lightblue2","#F0E442",  "red","#CC79A7","mediumpurple","seagreen1")
p=list()
clusterlabel = walktrap_cluster_SpatialPCA$cluster_label
for(count in 1:length(clusterlabel)){
        colnames(info) = c("sdimx","sdimy")
        loc1 = info$sdimx
        loc2 = info$sdimy
        cluster = clusterlabel[[count]]
        datt = data.frame(cluster, loc1, loc2)
        p[[count]] = ggplot(datt, aes(x = loc1, y = loc2, color = cluster)) +
            geom_point( alpha = 1,size=3) +
            scale_color_manual(values = cbp_spatialpca)+
            theme_void()+
            theme(plot.title = element_text(size = 10,  face = "bold"),
              text = element_text(size = 10),
              legend.position = "bottom")
}
print(ggarrange(p[[1]], p[[2]],p[[3]],
          ncol = 3, nrow = 1))

Compare with PCA clustering

dat$Z_pca=get_PCA(expr, PCnum=20)
walktrap_cluster_PCA = walktrap_clustering(knearest = seq(from=32,to=34,by=1), latent_dat=dat$Z_pca)
## [1] 1
## [1] 2
## [1] 3
cbp_pca <- c(  "plum1", "chocolate1","dodgerblue",  "palegreen4",  "lightblue2",
           "#F0E442","mediumaquamarine",  "red","#CC79A7","mediumpurple","seagreen1")
p=list()
clusterlabel = walktrap_cluster_PCA$cluster_label
for(count in 1:length(clusterlabel)){
        colnames(info) = c("sdimx","sdimy")
        loc1 = info$sdimx
        loc2 = info$sdimy
        cluster = clusterlabel[[count]]
        datt = data.frame(cluster, loc1, loc2)
        p[[count]] = ggplot(datt, aes(x = loc1, y = loc2, color = cluster)) +
            geom_point( alpha = 1,size=3) +
            scale_color_manual(values = cbp_pca)+
            theme_void()+
            theme(plot.title = element_text(size = 10,  face = "bold"),
              text = element_text(size = 10),
              legend.position = "bottom")
}
print(ggarrange(p[[1]], p[[2]],p[[3]],
          ncol = 3, nrow = 1))

Spatial trajectory inference.

meta_data = info
meta_data$SpatialPCA_Walktrap = walktrap_cluster_SpatialPCA$cluster_label[[2]]
sim<- SingleCellExperiment(assays = expr)
reducedDims(sim) <- SimpleList(DRM = t(dat$Z_spatial))
colData(sim)$Walktrap <- factor(meta_data$SpatialPCA_Walktrap)    
sim  <-slingshot(sim, clusterLabels = 'Walktrap', reducedDim = 'DRM')
summary(sim@colData@listData)
##                   Length Class              Mode   
## Walktrap          607    factor             numeric
## slingshot         607    PseudotimeOrdering S4     
## slingPseudotime_1 607    -none-             numeric
## slingPseudotime_2 607    -none-             numeric
## slingPseudotime_3 607    -none-             numeric
pseudotime_traj1 = sim@colData@listData$slingPseudotime_1
pseudotime_traj2 = sim@colData@listData$slingPseudotime_2
pseudotime_traj3 = sim@colData@listData$slingPseudotime_3
clusterlabels = meta_data$SpatialPCA_Walktrap
gridnum = 10
color_in = c(  "plum1", "dodgerblue","mediumaquamarine",  "palegreen4", "chocolate1",
            "lightblue2","#F0E442",  "black","#CC79A7","mediumpurple","seagreen1")

p_traj1 = plot_trajectory(pseudotime_traj1, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )
p_traj2 = plot_trajectory(pseudotime_traj2, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )
p_traj3 = plot_trajectory(pseudotime_traj3, info,clusterlabels,gridnum,color_in,pointsize=4 ,arrowlength=0.2,arrowsize=1,textsize=15 )


print(ggarrange( p_traj1[[4]],p_traj2[[4]],p_traj3[[4]],p_traj1[[1]],p_traj2[[1]],p_traj3[[1]],
          ncol = 3, nrow = 2))

Z_high = high_resolution(info, K, kernelpara="gaussian",ED=dat$ED, est_tau = dat$Est_para$par,est_W = Est_W[[1]] ,est_sigma0 = Est_W[[2]][1,1],est_Z = Est_Z,PCnum=20)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
#dat$Z_star = Z_high$Z_star
walktrap_SpatialPCA_highresolution = walktrap_clustering(knearest = 84, Z_high$Z_star)
## [1] 1
cbp2 <- c(  "plum1", "palegreen4","mediumaquamarine",  "dodgerblue", "chocolate1",
            "#F0E442","lightblue2",  "red","#CC79A7","mediumpurple","seagreen1")

loc1=unlist(Z_high$Location_star[,1])
loc2=unlist(Z_high$Location_star[,2])
clusters = walktrap_SpatialPCA_highresolution$cluster_label[[1]]
p3 = plot_cluster(loc1,loc2,clusters,pointsize=2,text_size=10 ,"",cbp2)
print(p3)